# Description ------------------------------------------------------------------

### This script downloads and cleans the mobile money data, for merging with
### market_month and market_only_data

# Settings ---------------------------------------------------------------------
data_path <- 'data/1_raw/'

# Functions --------------------------------------------------------------------
source("scripts/0_functions/functions_cleaning.R")

# Packages ---------------------------------------------------------------------
# if packages are not installed, they must be installed with:
# install.packages("package_name"); The name of package must be in quotes, thus,
# 
library(dplyr)
library(readr)
library(stringr)
library(labelled)
library(lubridate)

# Data -------------------------------------------------------------------------
market_nums <- read_csv(paste0(data_path, "Mobile Money/Market Names and Numbers.csv"))
mobile_mon <- read_csv(paste0(data_path, "Mobile Money/Mobile Money Transactions Compiled.csv"))

################################################################################
# Merging Market Names In ------------------------------------------------------
################################################################################
#fix Mbelwa
mobile_mon$District[mobile_mon$District == "Mbelwa"] <- "M'mbelwa"

# match types
mobile_mon <- mobile_mon |> 
  mutate(Market_Number_num = as.numeric(Market_Number)) |> 
  rename(Market_Number_statement = Market_Number)

# merge
mobile_mon <- full_join(market_nums, mobile_mon,
                        by = c("Market_Number" = "Market_Number_num",
                               "District"))

################################################################################
# Cleaning ---------------------------------------------------------------------
################################################################################
#fix "n/a" in transaction type when market is known
mobile_mon$Transaction_Type <- ifelse(mobile_mon$Transaction_Type == "n/a" &
                                        !is.na(mobile_mon$Market_Name),
                                      "Merchant Payment",
                                      mobile_mon$Transaction_Type)

#transform date into date
mobile_mon$Transaction_Date <- as.POSIXct(mobile_mon$Transaction_Date,
                                          format = "%m/%d/%Y")

#create month and year variables 
mobile_mon$month_str <- months(mobile_mon$Transaction_Date)
mobile_mon$month_num <- month(mobile_mon$Transaction_Date)
mobile_mon$year <- year(mobile_mon$Transaction_Date)
mobile_mon$day <- day(mobile_mon$Transaction_Date)

#checking for duplicates
dupl_trans <- mobile_mon$Transaction_ID[duplicated(mobile_mon)]
#seems to be rollbacks and bankings?
#View(mobile_mon[mobile_mon$Transaction_ID %in% dupl_trans,])
#removing duplicates
mobile_mon <- mobile_mon[!duplicated(mobile_mon),]

################################################################################
# Aggregating Measures of Interest to Market-Month Level -----------------------
################################################################################

#for aggregation, we have to drop entries that are not Merchant Payments
mobile_mon_monthly <- mobile_mon[mobile_mon$Transaction_Type == "Merchant Payment",] 
#note: this retains Mpale and Liwonde Central Market, which don't have any transactions

#add up within months
mobile_mon_monthly <- mobile_mon_monthly %>% 
  group_by(Market_Name, Market_Number, District, year, month_str, month_num) %>% 
  summarise(mob_mon_transf = sum(Transaction_Amount, na.rm = T),
            mob_mon_trnsctns = n(),
            .groups = "drop") %>% 
  arrange(Market_Name, year, month_num) 
#this now drops them, have to add them in again

#now merge a market, month-year data frame back in to note when
#a market had 0 transactions in a month where it possibly could have had some

#create all possible year-month and market combos
year_month <- unique(paste(mobile_mon$month_str, 
                           mobile_mon$month_num,
                           mobile_mon$year,
                           sep = " "))
year_month <- year_month[-length(year_month)]
market_info <- unique(paste(market_nums$Market_Name,
                            market_nums$Market_Number,
                            market_nums$District,
                            sep = " "))
year_month <- expand.grid(market_info = market_info,
                          year_month = year_month)
year_month$month_num <- as.numeric(str_split(year_month$year_month, " ",
                                             simplify = T)[,2])
year_month$month_str <- str_split(year_month$year_month, " ",
                                  simplify = T)[,1]
year_month$year <- as.numeric(str_split(year_month$year_month, " ",
                                        simplify = T)[,3])
year_month$Market_Name <- str_split(year_month$market_info, 
                                    " 9", simplify = T)[,1]
year_month$Market_Number <- as.numeric(str_extract_all(year_month$market_info,
                                                 "\\d{9}",
                                                 simplify = T))
year_month$District <- str_split(year_month$market_info, 
                                    " \\d{9} ", simplify = T)[,2]

#merge
mobile_mon_monthly <- full_join(year_month, mobile_mon_monthly,
                                by = c("Market_Name" = "Market_Name",
                                       "year" = "year",
                                       "month_str" = "month_str",
                                       "month_num" = "month_num",
                                       "District" = "District",
                                       "Market_Number" = "Market_Number"))


#replace transaction and amounts NAs here with 0s
mobile_mon_monthly[is.na(mobile_mon_monthly$mob_mon_transf) &
                     is.na(mobile_mon_monthly$mob_mon_trnsctns),
                   c("mob_mon_transf", "mob_mon_trnsctns")] <- 0

#some markets not known; will be dropped in "main" data
mobile_mon_monthly_incl_unknwn_mkts <- mobile_mon_monthly
mobile_mon_monthly <- mobile_mon_monthly[!is.na(mobile_mon_monthly$Market_Name),]

#adding yr_mnth_mkt ID var
mobile_mon_monthly <- mobile_mon_monthly %>% 
  mutate(yr_mnth_mkt = paste(year, 
                             ifelse(nchar(month_num) < 2, 
                                    paste0("0", month_num),
                                    month_num), 
                             str_replace_all(Market_Name, " ", "_"), 
                             sep = "_"))

# Saving -----------------------------------------------------------------------
save(mobile_mon, mobile_mon_monthly, mobile_mon_monthly_incl_unknwn_mkts,
     file = paste0("data/2_clean/","mobile_money.RData"))

